Overview

quick test to see what happens to distribution of env. variables if add more background points (is 10k enough or do I need more)

A note to anyone who might happen to stumble across this… I am a beginner in R and have had no exposure to similar languages. I don’t know what I’m doing. The code herein is unlikely to be elegant and there are probably more efficient ways of running the code.

Built with ‘r getRversion()’.

Package dependencies

You can load them using the following code which uses a function called ipak. Note this function checks to see if the packages are installed first. The “include=FALSE” supresses the package installation text appearing in the document…

load the raw background.csv file with all points to randomly extract points from (../output/env/unique_cell_centroid_lonlat_nafo2_depth.csv)

testbglist <- read.csv("../output/env/unique_cell_centroid_lonlat_nafo2_depth.csv", header = TRUE)
head(testbglist)
backobsno <- nrow(testbglist)
backobsno

the inefficent loop

This loop runs through the netcdf files and then looks for which rows in data_aea it should extract the value to point from, and at what depth (netcDF layer)

Start with 2007_10 data

strt <- Sys.time() #get the start time

xy <- testbglist[ ,c("longitude_","latitude_m")] # This is to tell R where the coordinates are. Note that the column order needs to be longitude, latitude
testbglistsp <- SpatialPointsDataFrame(coords = xy, data = testbglist, proj4string = CRS("+proj=aea +lat_1=50 +lat_2=70 +lat_0=40 +lon_0=-60 +x_0=0 +y_0=0 +ellps=GRS80 +datum=NAD83 +units=m +no_defs")) # The CRS is used here is for the albers equal area projection.


netcdf_list <- list.files("../data/env/bktstncdf", pattern = '*.nc', full.names = TRUE) #true means the full path is included
no_netcdf <- length(netcdf_list) #for the loop - need to know how many files to cycle through
netcdf_name <- list.files("../data/env/bktstncdf", pattern = '*.nc', full.names = FALSE) #false means the path is not included
aea <- raster("../output/env/aea.tif") 
yr <- 2007  # a variable for the observation year
mth <- 10  # a variable for the observation month

for (i in 1:no_netcdf) {  
  print(netcdf_name[i]) #this just prints the name of the netCDF R is working one
  brkyr <- as.integer(sapply(strsplit(netcdf_name[i], "_"), "[[", 1)) # extracting the first part of the netcdf filename (which is the year)
  brkmth <- as.integer(sapply(strsplit(netcdf_name[i], "_"), "[[", 2)) # extracting the second part of the netcdf filename (which is the month)
  brkvar <- (sapply(strsplit(netcdf_name[i], "_"), "[[", 3)) # extracting the third part of the netcdf (inc.nc)
  temp_brick <- brick(netcdf_list[i], lvar = 4)
  temp_brick <- projectRaster(temp_brick, aea) 
    for (j in 1:nrow(testbglistsp)) {  
      de <- testbglistsp$depthlayerno[[j]]  # a variable for the observation depth layer
          if (brkyr == yr & brkmth == mth & brkvar == "temp.nc"){
              testbglistsp$temp_surface[j] <- extract(x=temp_brick[[1]], y = testbglistsp[j, ]) 
              if (is.na(de)){
                testbglistsp$temp_depth[j] <- NA
              } else  
                testbglistsp$temp_depth[j] <- extract(x=temp_brick[[de]], y = testbglistsp[j, ])
          } else if (brkyr == yr & brkmth == mth & brkvar == "salinity.nc") {
              testbglistsp$salinity_surface[j] <- extract(x=temp_brick[[1]], y = testbglistsp[j, ]) 
              if (is.na(de)){
                testbglistsp$salinity_depth[j] <- NA
              } else  
                testbglistsp$salinity_depth[j] <- extract(x=temp_brick[[de]], y = testbglistsp[j, ]) 
          } else if (brkyr == yr & brkmth == mth & brkvar == "chl.nc") {
              testbglistsp$chl_surface[j] <- extract(x=temp_brick[[1]], y = testbglistsp[j, ]) 
              if (is.na(de)){
                testbglistsp$chl_depth[j] <- NA
              } else  
                testbglistsp$chl_depth[j] <- extract(x=temp_brick[[de]], y = testbglistsp[j, ]) 
          } else if (brkyr == yr & brkmth == mth & brkvar == "o2.nc") {
              testbglistsp$o2_surface[j] <- extract(x=temp_brick[[1]], y = testbglistsp[j, ]) 
              if (is.na(de)){
                testbglistsp$o2_depth[j] <- NA
              } else  
                testbglistsp$o2_depth[j] <- extract(x=temp_brick[[de]], y = testbglistsp[j, ]) 
          } else if (brkyr == yr & brkmth == mth & brkvar == "mlp.nc") {
              testbglistsp$mlp_surface[j] <- extract(x=temp_brick[[1]], y = testbglistsp[j, ])
          } else if (brkyr == yr & brkmth == mth & brkvar == "ssh.nc") {
              testbglistsp$ssh_surface[j] <- extract(x=temp_brick[[1]], y = testbglistsp[j, ]) 
            
          }
     
    }
}
write.csv(testbglistsp, "../data/env/background_point_check/200710_allbackgroundpoints.csv", row.names = FALSE)
test_back_df <- as.data.frame(testbglistsp)
print(Sys.time()-strt) #time it took to run

ok now create the first lot of random for 2007_10

testbk10000 <- test_back_df[sample(nrow(test_back_df), 10000), ]  #where 10000 = number of rows to sample (large sample as per maxent)
testbk20000 <- test_back_df[sample(nrow(test_back_df), 20000), ]  
testbk50000 <- test_back_df[sample(nrow(test_back_df), 50000), ]  
testbk100000 <- test_back_df[sample(nrow(test_back_df), 100000), ]  
testbk190000 <- test_back_df[sample(nrow(test_back_df), 190000), ]  

plot each variable against the different no of background points

ggplot(testbk10000, aes(x = ssh_surface)) + geom_density(na.rm = TRUE, colour = "red") + geom_density(data=testbk20000 , na.rm = TRUE, colour = "blue") + geom_density(data=testbk50000 , na.rm = TRUE, colour = "green") + geom_density(data=testbk100000 , na.rm = TRUE, colour = "orange") + geom_density(data=testbk190000 , na.rm = TRUE, colour = "pink") + labs(x = "Sea Surface Height Above Geoid (meters)")
dev.copy(png, "../output/env/background_point_check/200710_ssh_back_no.png") # to automatically save the plot to a png AND show it inline
dev.off() # stops automatic saving of the plot to a png

ggplot(testbk10000, aes(x = mlp_surface)) + geom_density(na.rm = TRUE, colour = "red") + geom_density(data=testbk20000 , na.rm = TRUE, colour = "blue") + geom_density(data=testbk50000 , na.rm = TRUE, colour = "green") + geom_density(data=testbk100000 , na.rm = TRUE, colour = "orange") + geom_density(data=testbk190000 , na.rm = TRUE, colour = "pink") + labs(x = "Mixed layer thickness (MLP) (meters)")
dev.copy(png,"../output/env/background_point_check/200710_mlp_back_no.png") # to automatically save the plot to a png AND show it inline 
dev.off() # stops automatic saving of the plot to a png

ggplot(testbk10000, aes(x = temp_surface)) + geom_density(na.rm = TRUE, colour = "red") + geom_density(data=testbk20000 , na.rm = TRUE, colour = "blue") + geom_density(data=testbk50000 , na.rm = TRUE, colour = "green") + geom_density(data=testbk100000 , na.rm = TRUE, colour = "orange") + geom_density(data=testbk190000 , na.rm = TRUE, colour = "pink")  + labs(x = "Temperature at surface (kelvin)")
dev.copy(png,"../output/env/background_point_check/200710_temp_surface_back_no.png") # to automatically save the plot to a png AND show it inline
dev.off() # stops automatic saving of the plot to a png

ggplot(testbk10000, aes(x = temp_depth)) + geom_density(na.rm = TRUE, colour = "red") + geom_density(data=testbk20000 , na.rm = TRUE, colour = "blue") + geom_density(data=testbk50000 , na.rm = TRUE, colour = "green") + geom_density(data=testbk100000 , na.rm = TRUE, colour = "orange") + geom_density(data=testbk190000 , na.rm = TRUE, colour = "pink") + labs(x = "Temperature at sampling depth (kelvin)")
dev.copy(png,"../output/env/background_point_check/200710_temp_depth_back_no.png") # to automatically save the plot to a png AND show it inline
dev.off() # stops automatic saving of the plot to a png

ggplot(testbk10000, aes(x = salinity_surface)) + geom_density(na.rm = TRUE, colour = "red") + geom_density(data=testbk20000 , na.rm = TRUE, colour = "blue") + geom_density(data=testbk50000 , na.rm = TRUE, colour = "green") + geom_density(data=testbk100000 , na.rm = TRUE, colour = "orange") + geom_density(data=testbk190000 , na.rm = TRUE, colour = "pink") + labs(x = "Salinity at surface (kelvin)")
dev.copy(png,"../output/env/background_point_check/200710_salinity_surface_back_no.png") # to automatically save the plot to a png AND show it inline
dev.off() # stops automatic saving of the plot to a png

ggplot(testbk10000, aes(x = salinity_depth)) + geom_density(na.rm = TRUE, colour = "red") + geom_density(data=testbk20000 , na.rm = TRUE, colour = "blue") + geom_density(data=testbk50000 , na.rm = TRUE, colour = "green") + geom_density(data=testbk100000 , na.rm = TRUE, colour = "orange") + geom_density(data=testbk190000 , na.rm = TRUE, colour = "pink") + labs(x = "Salinity at sampling depth (kelvin)")
dev.copy(png,"../output/env/background_point_check/200710_salinity_depth_back_no.png") # to automatically save the plot to a png AND show it inline
dev.off() # stops automatic saving of the plot to a png

ggplot(testbk10000, aes(x = chl_surface)) + geom_density(na.rm = TRUE, colour = "red") + geom_density(data=testbk20000 , na.rm = TRUE, colour = "blue") + geom_density(data=testbk50000 , na.rm = TRUE, colour = "green") + geom_density(data=testbk100000 , na.rm = TRUE, colour = "orange") + geom_density(data=testbk190000 , na.rm = TRUE, colour = "pink") + labs(x = "Chlorophyll concentration at surface (mmol.m-3)")
dev.copy(png,"../output/env/background_point_check/200710_chl_surface_back_no.png") # to automatically save the plot to a png AND show it inline
dev.off() # stops automatic saving of the plot to a png

ggplot(testbk10000, aes(x = chl_depth)) + geom_density(na.rm = TRUE, colour = "red") + geom_density(data=testbk20000 , na.rm = TRUE, colour = "blue") + geom_density(data=testbk50000 , na.rm = TRUE, colour = "green") + geom_density(data=testbk100000 , na.rm = TRUE, colour = "orange") + geom_density(data=testbk190000 , na.rm = TRUE, colour = "pink") + labs(x = "Chlorophyll concentration at sampling depth (mmol.m-3)")
dev.copy(png,"../output/env/background_point_check/200710_chl_depth_back_no.png") # to automatically save the plot to a png AND show it inline
dev.off() # stops automatic saving of the plot to a png

ggplot(testbk10000, aes(x = o2_surface)) + geom_density(na.rm = TRUE, colour = "red") + geom_density(data=testbk20000 , na.rm = TRUE, colour = "blue") + geom_density(data=testbk50000 , na.rm = TRUE, colour = "green") + geom_density(data=testbk100000 , na.rm = TRUE, colour = "orange") + geom_density(data=testbk190000 , na.rm = TRUE, colour = "pink") + labs(x = "Dissolved oxygen concentration at surface (mmol.m-3)")
dev.copy(png,"../output/env/background_point_check/200710_o2_surface_back_no.png") # to automatically save the plot to a png AND show it inline
dev.off() # stops automatic saving of the plot to a png

ggplot(testbk10000, aes(x = o2_depth)) + geom_density(na.rm = TRUE, colour = "red") + geom_density(data=testbk20000 , na.rm = TRUE, colour = "blue") + geom_density(data=testbk50000 , na.rm = TRUE, colour = "green") + geom_density(data=testbk100000 , na.rm = TRUE, colour = "orange") + geom_density(data=testbk190000 , na.rm = TRUE, colour = "pink") + labs(x = "Dissolved oxygen concentration at sampling depth (mmol.m-3)")
dev.copy(png,"../output/env/background_point_check/200710_o2_depth_back_no.png") # to automatically save the plot to a png AND show it inline
dev.off() # stops automatic saving of the plot to a png

Try again for another month - say 1999 02 AND again 2014 06

This time extract the values for all points and then subset

strt <- Sys.time() #get the start time

xy <- testbglist[ ,c("longitude_","latitude_m")] # This is to tell R where the coordinates are. Note that the column order needs to be longitude, latitude
testbglistsp <- SpatialPointsDataFrame(coords = xy, data = testbglist, proj4string = CRS("+proj=aea +lat_1=50 +lat_2=70 +lat_0=40 +lon_0=-60 +x_0=0 +y_0=0 +ellps=GRS80 +datum=NAD83 +units=m +no_defs")) # The CRS is used here is for the albers equal area projection.


netcdf_list <- list.files("../data/env/bktstncdf", pattern = '*.nc', full.names = TRUE) #true means the full path is included
no_netcdf <- length(netcdf_list) #for the loop - need to know how many files to cycle through
netcdf_name <- list.files("../data/env/bktstncdf", pattern = '*.nc', full.names = FALSE) #false means the path is not included
aea <- raster("../output/env/aea.tif") 
yr <- 1999  # a variable for the observation year
mth <- 02  # a variable for the observation month

for (i in 1:no_netcdf) {  
  print(netcdf_name[i]) #this just prints the name of the netCDF R is working one
  brkyr <- as.integer(sapply(strsplit(netcdf_name[i], "_"), "[[", 1)) # extracting the first part of the netcdf filename (which is the year)
  brkmth <- as.integer(sapply(strsplit(netcdf_name[i], "_"), "[[", 2)) # extracting the second part of the netcdf filename (which is the month)
  brkvar <- (sapply(strsplit(netcdf_name[i], "_"), "[[", 3)) # extracting the third part of the netcdf (inc.nc)
  temp_brick <- brick(netcdf_list[i], lvar = 4)
  temp_brick <- projectRaster(temp_brick, aea) 
    for (j in 1:nrow(testbglistsp)) {  
      de <- testbglistsp$depthlayerno[[j]]  # a variable for the observation depth layer
          if (brkyr == yr & brkmth == mth & brkvar == "temp.nc"){
              testbglistsp$temp_surface[j] <- extract(x=temp_brick[[1]], y = testbglistsp[j, ]) 
              if (is.na(de)){
                testbglistsp$temp_depth[j] <- NA
              } else  
                testbglistsp$temp_depth[j] <- extract(x=temp_brick[[de]], y = testbglistsp[j, ])
          } else if (brkyr == yr & brkmth == mth & brkvar == "salinity.nc") {
              testbglistsp$salinity_surface[j] <- extract(x=temp_brick[[1]], y = testbglistsp[j, ]) 
              if (is.na(de)){
                testbglistsp$salinity_depth[j] <- NA
              } else  
                testbglistsp$salinity_depth[j] <- extract(x=temp_brick[[de]], y = testbglistsp[j, ]) 
          } else if (brkyr == yr & brkmth == mth & brkvar == "chl.nc") {
              testbglistsp$chl_surface[j] <- extract(x=temp_brick[[1]], y = testbglistsp[j, ]) 
              if (is.na(de)){
                testbglistsp$chl_depth[j] <- NA
              } else  
                testbglistsp$chl_depth[j] <- extract(x=temp_brick[[de]], y = testbglistsp[j, ]) 
          } else if (brkyr == yr & brkmth == mth & brkvar == "o2.nc") {
              testbglistsp$o2_surface[j] <- extract(x=temp_brick[[1]], y = testbglistsp[j, ]) 
              if (is.na(de)){
                testbglistsp$o2_depth[j] <- NA
              } else  
                testbglistsp$o2_depth[j] <- extract(x=temp_brick[[de]], y = testbglistsp[j, ]) 
          } else if (brkyr == yr & brkmth == mth & brkvar == "mlp.nc") {
              testbglistsp$mlp_surface[j] <- extract(x=temp_brick[[1]], y = testbglistsp[j, ])
          } else if (brkyr == yr & brkmth == mth & brkvar == "ssh.nc") {
              testbglistsp$ssh_surface[j] <- extract(x=temp_brick[[1]], y = testbglistsp[j, ]) 
            
          }
     
    }
}
write.csv(testbglistsp, "../data/env/background_point_check/199902_allbackgroundpoints.csv", row.names = FALSE)
test_back_df <- as.data.frame(testbglistsp)
print(Sys.time()-strt) #time it took to run

ok now create the first lot of random for 1999_02

testbk10000 <- test_back_df[sample(nrow(test_back_df), 10000), ]  #where 10000 = number of rows to sample (large sample as per maxent)
testbk20000 <- test_back_df[sample(nrow(test_back_df), 20000), ]  
testbk50000 <- test_back_df[sample(nrow(test_back_df), 50000), ]  
testbk100000 <- test_back_df[sample(nrow(test_back_df), 100000), ]  
testbk190000 <- test_back_df[sample(nrow(test_back_df), 190000), ]  

and plot

ggplot(testbk10000, aes(x = ssh_surface)) + geom_density(na.rm = TRUE, colour = "red") + geom_density(data=testbk20000 , na.rm = TRUE, colour = "blue") + geom_density(data=testbk50000 , na.rm = TRUE, colour = "green") + geom_density(data=testbk100000 , na.rm = TRUE, colour = "orange") + geom_density(data=testbk190000 , na.rm = TRUE, colour = "pink") + labs(x = "Sea Surface Height Above Geoid (meters)")
dev.copy(png,"../output/env/background_point_check/199902_ssh_back.png") # to automatically save the plot to a png AND show it inline
png 
  3 
dev.off() # stops automatic saving of the plot to a png
png 
  2 

And now 2014_06

strt <- Sys.time() #get the start time

xy <- testbglist[ ,c("longitude_","latitude_m")] # This is to tell R where the coordinates are. Note that the column order needs to be longitude, latitude
testbglistsp <- SpatialPointsDataFrame(coords = xy, data = testbglist, proj4string = CRS("+proj=aea +lat_1=50 +lat_2=70 +lat_0=40 +lon_0=-60 +x_0=0 +y_0=0 +ellps=GRS80 +datum=NAD83 +units=m +no_defs")) # The CRS is used here is for the albers equal area projection.


netcdf_list <- list.files("../data/env/bktstncdf", pattern = '*.nc', full.names = TRUE) #true means the full path is included
no_netcdf <- length(netcdf_list) #for the loop - need to know how many files to cycle through
netcdf_name <- list.files("../data/env/bktstncdf", pattern = '*.nc', full.names = FALSE) #false means the path is not included
aea <- raster("../output/env/aea.tif") 
yr <- 2014  # a variable for the observation year
mth <- 06  # a variable for the observation month

for (i in 1:no_netcdf) {  
  print(netcdf_name[i]) #this just prints the name of the netCDF R is working one
  brkyr <- as.integer(sapply(strsplit(netcdf_name[i], "_"), "[[", 1)) # extracting the first part of the netcdf filename (which is the year)
  brkmth <- as.integer(sapply(strsplit(netcdf_name[i], "_"), "[[", 2)) # extracting the second part of the netcdf filename (which is the month)
  brkvar <- (sapply(strsplit(netcdf_name[i], "_"), "[[", 3)) # extracting the third part of the netcdf (inc.nc)
  temp_brick <- brick(netcdf_list[i], lvar = 4)
  temp_brick <- projectRaster(temp_brick, aea) 
    for (j in 1:nrow(testbglistsp)) {  
      de <- testbglistsp$depthlayerno[[j]]  # a variable for the observation depth layer
          if (brkyr == yr & brkmth == mth & brkvar == "temp.nc"){
              testbglistsp$temp_surface[j] <- extract(x=temp_brick[[1]], y = testbglistsp[j, ]) 
              if (is.na(de)){
                testbglistsp$temp_depth[j] <- NA
              } else  
                testbglistsp$temp_depth[j] <- extract(x=temp_brick[[de]], y = testbglistsp[j, ])
          } else if (brkyr == yr & brkmth == mth & brkvar == "salinity.nc") {
              testbglistsp$salinity_surface[j] <- extract(x=temp_brick[[1]], y = testbglistsp[j, ]) 
              if (is.na(de)){
                testbglistsp$salinity_depth[j] <- NA
              } else  
                testbglistsp$salinity_depth[j] <- extract(x=temp_brick[[de]], y = testbglistsp[j, ]) 
          } else if (brkyr == yr & brkmth == mth & brkvar == "chl.nc") {
              testbglistsp$chl_surface[j] <- extract(x=temp_brick[[1]], y = testbglistsp[j, ]) 
              if (is.na(de)){
                testbglistsp$chl_depth[j] <- NA
              } else  
                testbglistsp$chl_depth[j] <- extract(x=temp_brick[[de]], y = testbglistsp[j, ]) 
          } else if (brkyr == yr & brkmth == mth & brkvar == "o2.nc") {
              testbglistsp$o2_surface[j] <- extract(x=temp_brick[[1]], y = testbglistsp[j, ]) 
              if (is.na(de)){
                testbglistsp$o2_depth[j] <- NA
              } else  
                testbglistsp$o2_depth[j] <- extract(x=temp_brick[[de]], y = testbglistsp[j, ]) 
          } else if (brkyr == yr & brkmth == mth & brkvar == "mlp.nc") {
              testbglistsp$mlp_surface[j] <- extract(x=temp_brick[[1]], y = testbglistsp[j, ])
          } else if (brkyr == yr & brkmth == mth & brkvar == "ssh.nc") {
              testbglistsp$ssh_surface[j] <- extract(x=temp_brick[[1]], y = testbglistsp[j, ]) 
            
          }
     
    }
}
write.csv(testbglistsp, "../data/env/background_point_check/201406_allbackgroundpoints.csv", row.names = FALSE)
test_back_df <- as.data.frame(testbglistsp)
print(Sys.time()-strt) #time it took to run

ok now create the first lot of random for 2014_06

testbk10000 <- test_back_df[sample(nrow(test_back_df), 10000), ]  #where 10000 = number of rows to sample (large sample as per maxent)
testbk20000 <- test_back_df[sample(nrow(test_back_df), 20000), ]  
testbk50000 <- test_back_df[sample(nrow(test_back_df), 50000), ]  
testbk100000 <- test_back_df[sample(nrow(test_back_df), 100000), ]  
testbk190000 <- test_back_df[sample(nrow(test_back_df), 190000), ]  

and plot

ggplot(testbk10000, aes(x = ssh_surface)) + geom_density(na.rm = TRUE, colour = "red") + geom_density(data=testbk20000 , na.rm = TRUE, colour = "blue") + geom_density(data=testbk50000 , na.rm = TRUE, colour = "green") + geom_density(data=testbk100000 , na.rm = TRUE, colour = "orange") + geom_density(data=testbk190000 , na.rm = TRUE, colour = "pink")
dev.copy(png,"../output/env/background_point_check/201406_ssh.png") # to automatically save the plot to a png AND show it inline
png 
  3 
dev.off() # stops automatic saving of the plot to a png
png 
  2 

ggplot(testbk10000, aes(x = mlp_surface)) + geom_density(na.rm = TRUE, colour = "red") + geom_density(data=testbk20000 , na.rm = TRUE, colour = "blue") + geom_density(data=testbk50000 , na.rm = TRUE, colour = "green") + geom_density(data=testbk100000 , na.rm = TRUE, colour = "orange") + geom_density(data=testbk190000 , na.rm = TRUE, colour = "pink")
dev.copy(png,"../output/env/background_point_check/201406_mlp.png") # to automatically save the plot to a png AND show it inline
png 
  3 
dev.off() # stops automatic saving of the plot to a png
png 
  2 

ggplot(testbk10000, aes(x = temp_surface)) + geom_density(na.rm = TRUE, colour = "red") + geom_density(data=testbk20000 , na.rm = TRUE, colour = "blue") + geom_density(data=testbk50000 , na.rm = TRUE, colour = "green") + geom_density(data=testbk100000 , na.rm = TRUE, colour = "orange") + geom_density(data=testbk190000 , na.rm = TRUE, colour = "pink")
dev.copy(png,"../output/env/background_point_check/201406_temp_surface.png") # to automatically save the plot to a png AND show it inline
png 
  3 
dev.off() # stops automatic saving of the plot to a png
png 
  2 

ggplot(testbk10000, aes(x = temp_depth)) + geom_density(na.rm = TRUE, colour = "red") + geom_density(data=testbk20000 , na.rm = TRUE, colour = "blue") + geom_density(data=testbk50000 , na.rm = TRUE, colour = "green") + geom_density(data=testbk100000 , na.rm = TRUE, colour = "orange") + geom_density(data=testbk190000 , na.rm = TRUE, colour = "pink")
dev.copy(png,"../output/env/background_point_check/201406_temp_depth.png") # to automatically save the plot to a png AND show it inline
png 
  3 
dev.off() # stops automatic saving of the plot to a png
png 
  2 

ggplot(testbk10000, aes(x = salinity_surface)) + geom_density(na.rm = TRUE, colour = "red") + geom_density(data=testbk20000 , na.rm = TRUE, colour = "blue") + geom_density(data=testbk50000 , na.rm = TRUE, colour = "green") + geom_density(data=testbk100000 , na.rm = TRUE, colour = "orange") + geom_density(data=testbk190000 , na.rm = TRUE, colour = "pink")
dev.copy(png,"../output/env/background_point_check/201406_salinity_surface.png") # to automatically save the plot to a png AND show it inline
png 
  3 
dev.off() # stops automatic saving of the plot to a png
png 
  2 

ggplot(testbk10000, aes(x = salinity_depth)) + geom_density(na.rm = TRUE, colour = "red") + geom_density(data=testbk20000 , na.rm = TRUE, colour = "blue") + geom_density(data=testbk50000 , na.rm = TRUE, colour = "green") + geom_density(data=testbk100000 , na.rm = TRUE, colour = "orange") + geom_density(data=testbk190000 , na.rm = TRUE, colour = "pink")
dev.copy(png,"../output/env/background_point_check/201406_salinity_depth.png") # to automatically save the plot to a png AND show it inline
png 
  3 
dev.off() # stops automatic saving of the plot to a png
png 
  2 

ggplot(testbk10000, aes(x = chl_surface)) + geom_density(na.rm = TRUE, colour = "red") + geom_density(data=testbk20000 , na.rm = TRUE, colour = "blue") + geom_density(data=testbk50000 , na.rm = TRUE, colour = "green") + geom_density(data=testbk100000 , na.rm = TRUE, colour = "orange") + geom_density(data=testbk190000 , na.rm = TRUE, colour = "pink")
dev.copy(png,"../output/env/background_point_check/201406_chl_surface.png") # to automatically save the plot to a png AND show it inline
png 
  3 
dev.off() # stops automatic saving of the plot to a png
png 
  2 

ggplot(testbk10000, aes(x = chl_depth)) + geom_density(na.rm = TRUE, colour = "red") + geom_density(data=testbk20000 , na.rm = TRUE, colour = "blue") + geom_density(data=testbk50000 , na.rm = TRUE, colour = "green") + geom_density(data=testbk100000 , na.rm = TRUE, colour = "orange") + geom_density(data=testbk190000 , na.rm = TRUE, colour = "pink")
dev.copy(png,"../output/env/background_point_check/201406_chl_depth.png") # to automatically save the plot to a png AND show it inline
png 
  3 
dev.off() # stops automatic saving of the plot to a png
png 
  2 

ggplot(testbk10000, aes(x = o2_surface)) + geom_density(na.rm = TRUE, colour = "red") + geom_density(data=testbk20000 , na.rm = TRUE, colour = "blue") + geom_density(data=testbk50000 , na.rm = TRUE, colour = "green") + geom_density(data=testbk100000 , na.rm = TRUE, colour = "orange") + geom_density(data=testbk190000 , na.rm = TRUE, colour = "pink")
dev.copy(png,"../output/env/background_point_check/201406_o2_surface.png") # to automatically save the plot to a png AND show it inline
png 
  3 
dev.off() # stops automatic saving of the plot to a png
png 
  2 

ggplot(testbk10000, aes(x = o2_depth)) + geom_density(na.rm = TRUE, colour = "red") + geom_density(data=testbk20000 , na.rm = TRUE, colour = "blue") + geom_density(data=testbk50000 , na.rm = TRUE, colour = "green") + geom_density(data=testbk100000 , na.rm = TRUE, colour = "orange") + geom_density(data=testbk190000 , na.rm = TRUE, colour = "pink")
dev.copy(png,"../output/env/background_point_check/201406_o2_depth.png") # to automatically save the plot to a png AND show it inline
png 
  3 
dev.off() # stops automatic saving of the plot to a png
png 
  2 

3d plot background points

bck2014_06_3d
No trace type specified:
  Based on info supplied, a 'scatter3d' trace seems appropriate.
  Read more about this trace type -> https://plot.ly/r/reference/#scatter3d
No scatter3d mode specifed:
  Setting the mode to markers
  Read more about this attribute -> https://plot.ly/r/reference/#scatter-mode
No trace type specified:
  Based on info supplied, a 'scatter3d' trace seems appropriate.
  Read more about this trace type -> https://plot.ly/r/reference/#scatter3d
No scatter3d mode specifed:
  Setting the mode to markers
  Read more about this attribute -> https://plot.ly/r/reference/#scatter-mode

2d plot background points

bck10000points_2d <- plot(x= testbk10000$longitude_, y = testbk10000$latitude_m, xlab = "Longitude (meters)", ylab = "Latitude (meters")
dev.copy(png,"../output/env/background_point_check/bck10000points_2d.png") # to automatically save the plot to a png AND show it inline
png 
  3 
dev.off() # stops automatic saving of the plot to a png
png 
  2 

---
title: "background points - is more better?"
author: "Samantha Andrews"
output: 
  html_notebook: 
editor_options: 
  chunk_output_type: inline
---

# Overview
quick test to see what happens to distribution of env. variables if add more background points (is 10k enough or do I need more)

A note to anyone who might happen to stumble across this... I am a beginner in R and have had no exposure to similar languages. I don't know what I'm doing. The code herein is unlikely to be elegant and there are probably more efficient ways of running the code.

Built with 'r getRversion()'.

# Package dependencies
You can load them using the following code which uses a function called [ipak](https://gist.github.com/stevenworthington/3178163). 
Note this function checks to see if the packages are installed first.
The "include=FALSE" supresses the package installation text appearing in the document...
```{r pre-install packages, include=FALSE}
packages <- c("ncdf4", "raster", "ggplot2", "plotly") 
source("../src/ipak.R")
ipak(packages)
```

load the raw background.csv file with all points to randomly extract points from (../output/env/unique_cell_centroid_lonlat_nafo2_depth.csv)

```{r}
testbglist <- read.csv("../output/env/unique_cell_centroid_lonlat_nafo2_depth.csv", header = TRUE)
head(testbglist)
```

```{r}
backobsno <- nrow(testbglist)
backobsno
```



# the inefficent loop



This loop runs through the netcdf files and then looks for which rows in data_aea it should extract the value to point from, and at what depth (netcDF layer)

Start with 2007_10 data

```{r}
strt <- Sys.time() #get the start time

xy <- testbglist[ ,c("longitude_","latitude_m")] # This is to tell R where the coordinates are. Note that the column order needs to be longitude, latitude
testbglistsp <- SpatialPointsDataFrame(coords = xy, data = testbglist, proj4string = CRS("+proj=aea +lat_1=50 +lat_2=70 +lat_0=40 +lon_0=-60 +x_0=0 +y_0=0 +ellps=GRS80 +datum=NAD83 +units=m +no_defs")) # The CRS is used here is for the albers equal area projection.


netcdf_list <- list.files("../data/env/bktstncdf", pattern = '*.nc', full.names = TRUE) #true means the full path is included
no_netcdf <- length(netcdf_list) #for the loop - need to know how many files to cycle through
netcdf_name <- list.files("../data/env/bktstncdf", pattern = '*.nc', full.names = FALSE) #false means the path is not included
aea <- raster("../output/env/aea.tif") 
yr <- 2007  # a variable for the observation year
mth <- 10  # a variable for the observation month

for (i in 1:no_netcdf) {  
  print(netcdf_name[i]) #this just prints the name of the netCDF R is working one
  brkyr <- as.integer(sapply(strsplit(netcdf_name[i], "_"), "[[", 1)) # extracting the first part of the netcdf filename (which is the year)
  brkmth <- as.integer(sapply(strsplit(netcdf_name[i], "_"), "[[", 2)) # extracting the second part of the netcdf filename (which is the month)
  brkvar <- (sapply(strsplit(netcdf_name[i], "_"), "[[", 3)) # extracting the third part of the netcdf (inc.nc)
  temp_brick <- brick(netcdf_list[i], lvar = 4)
  temp_brick <- projectRaster(temp_brick, aea) 
    for (j in 1:nrow(testbglistsp)) {  
      de <- testbglistsp$depthlayerno[[j]]  # a variable for the observation depth layer
          if (brkyr == yr & brkmth == mth & brkvar == "temp.nc"){
              testbglistsp$temp_surface[j] <- extract(x=temp_brick[[1]], y = testbglistsp[j, ]) 
              if (is.na(de)){
                testbglistsp$temp_depth[j] <- NA
              } else  
                testbglistsp$temp_depth[j] <- extract(x=temp_brick[[de]], y = testbglistsp[j, ])
          } else if (brkyr == yr & brkmth == mth & brkvar == "salinity.nc") {
              testbglistsp$salinity_surface[j] <- extract(x=temp_brick[[1]], y = testbglistsp[j, ]) 
              if (is.na(de)){
                testbglistsp$salinity_depth[j] <- NA
              } else  
                testbglistsp$salinity_depth[j] <- extract(x=temp_brick[[de]], y = testbglistsp[j, ]) 
          } else if (brkyr == yr & brkmth == mth & brkvar == "chl.nc") {
              testbglistsp$chl_surface[j] <- extract(x=temp_brick[[1]], y = testbglistsp[j, ]) 
              if (is.na(de)){
                testbglistsp$chl_depth[j] <- NA
              } else  
                testbglistsp$chl_depth[j] <- extract(x=temp_brick[[de]], y = testbglistsp[j, ]) 
          } else if (brkyr == yr & brkmth == mth & brkvar == "o2.nc") {
              testbglistsp$o2_surface[j] <- extract(x=temp_brick[[1]], y = testbglistsp[j, ]) 
              if (is.na(de)){
                testbglistsp$o2_depth[j] <- NA
              } else  
                testbglistsp$o2_depth[j] <- extract(x=temp_brick[[de]], y = testbglistsp[j, ]) 
          } else if (brkyr == yr & brkmth == mth & brkvar == "mlp.nc") {
              testbglistsp$mlp_surface[j] <- extract(x=temp_brick[[1]], y = testbglistsp[j, ])
          } else if (brkyr == yr & brkmth == mth & brkvar == "ssh.nc") {
              testbglistsp$ssh_surface[j] <- extract(x=temp_brick[[1]], y = testbglistsp[j, ]) 
            
          }
     
    }
}
write.csv(testbglistsp, "../data/env/background_point_check/200710_allbackgroundpoints.csv", row.names = FALSE)
test_back_df <- as.data.frame(testbglistsp)
print(Sys.time()-strt) #time it took to run
```

ok now create the first lot of random for 2007_10

```{r}
testbk10000 <- test_back_df[sample(nrow(test_back_df), 10000), ]  #where 10000 = number of rows to sample (large sample as per maxent)
testbk20000 <- test_back_df[sample(nrow(test_back_df), 20000), ]  
testbk50000 <- test_back_df[sample(nrow(test_back_df), 50000), ]  
testbk100000 <- test_back_df[sample(nrow(test_back_df), 100000), ]  
testbk190000 <- test_back_df[sample(nrow(test_back_df), 190000), ]  
```



plot each variable against the different no of background points



```{r}
ggplot(testbk10000, aes(x = ssh_surface)) + geom_density(na.rm = TRUE, colour = "red") + geom_density(data=testbk20000 , na.rm = TRUE, colour = "blue") + geom_density(data=testbk50000 , na.rm = TRUE, colour = "green") + geom_density(data=testbk100000 , na.rm = TRUE, colour = "orange") + geom_density(data=testbk190000 , na.rm = TRUE, colour = "pink") + labs(x = "Sea Surface Height Above Geoid (meters)")
dev.copy(png, "../output/env/background_point_check/200710_ssh_back_no.png") # to automatically save the plot to a png AND show it inline
dev.off() # stops automatic saving of the plot to a png

ggplot(testbk10000, aes(x = mlp_surface)) + geom_density(na.rm = TRUE, colour = "red") + geom_density(data=testbk20000 , na.rm = TRUE, colour = "blue") + geom_density(data=testbk50000 , na.rm = TRUE, colour = "green") + geom_density(data=testbk100000 , na.rm = TRUE, colour = "orange") + geom_density(data=testbk190000 , na.rm = TRUE, colour = "pink") + labs(x = "Mixed layer thickness (MLP) (meters)")
dev.copy(png,"../output/env/background_point_check/200710_mlp_back_no.png") # to automatically save the plot to a png AND show it inline 
dev.off() # stops automatic saving of the plot to a png

ggplot(testbk10000, aes(x = temp_surface)) + geom_density(na.rm = TRUE, colour = "red") + geom_density(data=testbk20000 , na.rm = TRUE, colour = "blue") + geom_density(data=testbk50000 , na.rm = TRUE, colour = "green") + geom_density(data=testbk100000 , na.rm = TRUE, colour = "orange") + geom_density(data=testbk190000 , na.rm = TRUE, colour = "pink")  + labs(x = "Temperature at surface (kelvin)")
dev.copy(png,"../output/env/background_point_check/200710_temp_surface_back_no.png") # to automatically save the plot to a png AND show it inline
dev.off() # stops automatic saving of the plot to a png

ggplot(testbk10000, aes(x = temp_depth)) + geom_density(na.rm = TRUE, colour = "red") + geom_density(data=testbk20000 , na.rm = TRUE, colour = "blue") + geom_density(data=testbk50000 , na.rm = TRUE, colour = "green") + geom_density(data=testbk100000 , na.rm = TRUE, colour = "orange") + geom_density(data=testbk190000 , na.rm = TRUE, colour = "pink") + labs(x = "Temperature at sampling depth (kelvin)")
dev.copy(png,"../output/env/background_point_check/200710_temp_depth_back_no.png") # to automatically save the plot to a png AND show it inline
dev.off() # stops automatic saving of the plot to a png

ggplot(testbk10000, aes(x = salinity_surface)) + geom_density(na.rm = TRUE, colour = "red") + geom_density(data=testbk20000 , na.rm = TRUE, colour = "blue") + geom_density(data=testbk50000 , na.rm = TRUE, colour = "green") + geom_density(data=testbk100000 , na.rm = TRUE, colour = "orange") + geom_density(data=testbk190000 , na.rm = TRUE, colour = "pink") + labs(x = "Salinity at surface (kelvin)")
dev.copy(png,"../output/env/background_point_check/200710_salinity_surface_back_no.png") # to automatically save the plot to a png AND show it inline
dev.off() # stops automatic saving of the plot to a png

ggplot(testbk10000, aes(x = salinity_depth)) + geom_density(na.rm = TRUE, colour = "red") + geom_density(data=testbk20000 , na.rm = TRUE, colour = "blue") + geom_density(data=testbk50000 , na.rm = TRUE, colour = "green") + geom_density(data=testbk100000 , na.rm = TRUE, colour = "orange") + geom_density(data=testbk190000 , na.rm = TRUE, colour = "pink") + labs(x = "Salinity at sampling depth (kelvin)")
dev.copy(png,"../output/env/background_point_check/200710_salinity_depth_back_no.png") # to automatically save the plot to a png AND show it inline
dev.off() # stops automatic saving of the plot to a png

ggplot(testbk10000, aes(x = chl_surface)) + geom_density(na.rm = TRUE, colour = "red") + geom_density(data=testbk20000 , na.rm = TRUE, colour = "blue") + geom_density(data=testbk50000 , na.rm = TRUE, colour = "green") + geom_density(data=testbk100000 , na.rm = TRUE, colour = "orange") + geom_density(data=testbk190000 , na.rm = TRUE, colour = "pink") + labs(x = "Chlorophyll concentration at surface (mmol.m-3)")
dev.copy(png,"../output/env/background_point_check/200710_chl_surface_back_no.png") # to automatically save the plot to a png AND show it inline
dev.off() # stops automatic saving of the plot to a png

ggplot(testbk10000, aes(x = chl_depth)) + geom_density(na.rm = TRUE, colour = "red") + geom_density(data=testbk20000 , na.rm = TRUE, colour = "blue") + geom_density(data=testbk50000 , na.rm = TRUE, colour = "green") + geom_density(data=testbk100000 , na.rm = TRUE, colour = "orange") + geom_density(data=testbk190000 , na.rm = TRUE, colour = "pink") + labs(x = "Chlorophyll concentration at sampling depth (mmol.m-3)")
dev.copy(png,"../output/env/background_point_check/200710_chl_depth_back_no.png") # to automatically save the plot to a png AND show it inline
dev.off() # stops automatic saving of the plot to a png

ggplot(testbk10000, aes(x = o2_surface)) + geom_density(na.rm = TRUE, colour = "red") + geom_density(data=testbk20000 , na.rm = TRUE, colour = "blue") + geom_density(data=testbk50000 , na.rm = TRUE, colour = "green") + geom_density(data=testbk100000 , na.rm = TRUE, colour = "orange") + geom_density(data=testbk190000 , na.rm = TRUE, colour = "pink") + labs(x = "Dissolved oxygen concentration at surface (mmol.m-3)")
dev.copy(png,"../output/env/background_point_check/200710_o2_surface_back_no.png") # to automatically save the plot to a png AND show it inline
dev.off() # stops automatic saving of the plot to a png

ggplot(testbk10000, aes(x = o2_depth)) + geom_density(na.rm = TRUE, colour = "red") + geom_density(data=testbk20000 , na.rm = TRUE, colour = "blue") + geom_density(data=testbk50000 , na.rm = TRUE, colour = "green") + geom_density(data=testbk100000 , na.rm = TRUE, colour = "orange") + geom_density(data=testbk190000 , na.rm = TRUE, colour = "pink") + labs(x = "Dissolved oxygen concentration at sampling depth (mmol.m-3)")
dev.copy(png,"../output/env/background_point_check/200710_o2_depth_back_no.png") # to automatically save the plot to a png AND show it inline
dev.off() # stops automatic saving of the plot to a png
```

Try again for another month - say 1999 02 AND again 2014 06

This time extract the values for all points and then subset

```{r}
strt <- Sys.time() #get the start time

xy <- testbglist[ ,c("longitude_","latitude_m")] # This is to tell R where the coordinates are. Note that the column order needs to be longitude, latitude
testbglistsp <- SpatialPointsDataFrame(coords = xy, data = testbglist, proj4string = CRS("+proj=aea +lat_1=50 +lat_2=70 +lat_0=40 +lon_0=-60 +x_0=0 +y_0=0 +ellps=GRS80 +datum=NAD83 +units=m +no_defs")) # The CRS is used here is for the albers equal area projection.


netcdf_list <- list.files("../data/env/bktstncdf", pattern = '*.nc', full.names = TRUE) #true means the full path is included
no_netcdf <- length(netcdf_list) #for the loop - need to know how many files to cycle through
netcdf_name <- list.files("../data/env/bktstncdf", pattern = '*.nc', full.names = FALSE) #false means the path is not included
aea <- raster("../output/env/aea.tif") 
yr <- 1999  # a variable for the observation year
mth <- 02  # a variable for the observation month

for (i in 1:no_netcdf) {  
  print(netcdf_name[i]) #this just prints the name of the netCDF R is working one
  brkyr <- as.integer(sapply(strsplit(netcdf_name[i], "_"), "[[", 1)) # extracting the first part of the netcdf filename (which is the year)
  brkmth <- as.integer(sapply(strsplit(netcdf_name[i], "_"), "[[", 2)) # extracting the second part of the netcdf filename (which is the month)
  brkvar <- (sapply(strsplit(netcdf_name[i], "_"), "[[", 3)) # extracting the third part of the netcdf (inc.nc)
  temp_brick <- brick(netcdf_list[i], lvar = 4)
  temp_brick <- projectRaster(temp_brick, aea) 
    for (j in 1:nrow(testbglistsp)) {  
      de <- testbglistsp$depthlayerno[[j]]  # a variable for the observation depth layer
          if (brkyr == yr & brkmth == mth & brkvar == "temp.nc"){
              testbglistsp$temp_surface[j] <- extract(x=temp_brick[[1]], y = testbglistsp[j, ]) 
              if (is.na(de)){
                testbglistsp$temp_depth[j] <- NA
              } else  
                testbglistsp$temp_depth[j] <- extract(x=temp_brick[[de]], y = testbglistsp[j, ])
          } else if (brkyr == yr & brkmth == mth & brkvar == "salinity.nc") {
              testbglistsp$salinity_surface[j] <- extract(x=temp_brick[[1]], y = testbglistsp[j, ]) 
              if (is.na(de)){
                testbglistsp$salinity_depth[j] <- NA
              } else  
                testbglistsp$salinity_depth[j] <- extract(x=temp_brick[[de]], y = testbglistsp[j, ]) 
          } else if (brkyr == yr & brkmth == mth & brkvar == "chl.nc") {
              testbglistsp$chl_surface[j] <- extract(x=temp_brick[[1]], y = testbglistsp[j, ]) 
              if (is.na(de)){
                testbglistsp$chl_depth[j] <- NA
              } else  
                testbglistsp$chl_depth[j] <- extract(x=temp_brick[[de]], y = testbglistsp[j, ]) 
          } else if (brkyr == yr & brkmth == mth & brkvar == "o2.nc") {
              testbglistsp$o2_surface[j] <- extract(x=temp_brick[[1]], y = testbglistsp[j, ]) 
              if (is.na(de)){
                testbglistsp$o2_depth[j] <- NA
              } else  
                testbglistsp$o2_depth[j] <- extract(x=temp_brick[[de]], y = testbglistsp[j, ]) 
          } else if (brkyr == yr & brkmth == mth & brkvar == "mlp.nc") {
              testbglistsp$mlp_surface[j] <- extract(x=temp_brick[[1]], y = testbglistsp[j, ])
          } else if (brkyr == yr & brkmth == mth & brkvar == "ssh.nc") {
              testbglistsp$ssh_surface[j] <- extract(x=temp_brick[[1]], y = testbglistsp[j, ]) 
            
          }
     
    }
}
write.csv(testbglistsp, "../data/env/background_point_check/199902_allbackgroundpoints.csv", row.names = FALSE)
test_back_df <- as.data.frame(testbglistsp)
print(Sys.time()-strt) #time it took to run
```

ok now create the first lot of random for 1999_02

```{r}
testbk10000 <- test_back_df[sample(nrow(test_back_df), 10000), ]  #where 10000 = number of rows to sample (large sample as per maxent)
testbk20000 <- test_back_df[sample(nrow(test_back_df), 20000), ]  
testbk50000 <- test_back_df[sample(nrow(test_back_df), 50000), ]  
testbk100000 <- test_back_df[sample(nrow(test_back_df), 100000), ]  
testbk190000 <- test_back_df[sample(nrow(test_back_df), 190000), ]  
```

and plot


```{r}
ggplot(testbk10000, aes(x = ssh_surface)) + geom_density(na.rm = TRUE, colour = "red") + geom_density(data=testbk20000 , na.rm = TRUE, colour = "blue") + geom_density(data=testbk50000 , na.rm = TRUE, colour = "green") + geom_density(data=testbk100000 , na.rm = TRUE, colour = "orange") + geom_density(data=testbk190000 , na.rm = TRUE, colour = "pink") + labs(x = "Sea Surface Height Above Geoid (meters)")
dev.copy(png,"../output/env/background_point_check/199902_ssh_back.png") # to automatically save the plot to a png AND show it inline
dev.off() # stops automatic saving of the plot to a png

ggplot(testbk10000, aes(x = mlp_surface)) + geom_density(na.rm = TRUE, colour = "red") + geom_density(data=testbk20000 , na.rm = TRUE, colour = "blue") + geom_density(data=testbk50000 , na.rm = TRUE, colour = "green") + geom_density(data=testbk100000 , na.rm = TRUE, colour = "orange") + geom_density(data=testbk190000 , na.rm = TRUE, colour = "pink")  + labs(x = "Mixed layer thickness (MLP) (meters)")
dev.copy(png,"../output/env/background_point_check/199902_mlp.png") # to automatically save the plot to a png AND show it inline
dev.off() # stops automatic saving of the plot to a png

ggplot(testbk10000, aes(x = temp_surface)) + geom_density(na.rm = TRUE, colour = "red") + geom_density(data=testbk20000 , na.rm = TRUE, colour = "blue") + geom_density(data=testbk50000 , na.rm = TRUE, colour = "green") + geom_density(data=testbk100000 , na.rm = TRUE, colour = "orange") + geom_density(data=testbk190000 , na.rm = TRUE, colour = "pink") + labs(x = "Temperature at surface (kelvin)")
dev.copy(png,"../output/env/background_point_check/199902_temp_surface.png") # to automatically save the plot to a png AND show it inline
dev.off() # stops automatic saving of the plot to a png

ggplot(testbk10000, aes(x = temp_depth)) + geom_density(na.rm = TRUE, colour = "red") + geom_density(data=testbk20000 , na.rm = TRUE, colour = "blue") + geom_density(data=testbk50000 , na.rm = TRUE, colour = "green") + geom_density(data=testbk100000 , na.rm = TRUE, colour = "orange") + geom_density(data=testbk190000 , na.rm = TRUE, colour = "pink") + labs(x = "Temperature at sampling depth (kelvin)")
dev.copy(png,"../output/env/background_point_check/199902_temp_depth.png") # to automatically save the plot to a png AND show it inline
dev.off() # stops automatic saving of the plot to a png

ggplot(testbk10000, aes(x = salinity_surface)) + geom_density(na.rm = TRUE, colour = "red") + geom_density(data=testbk20000 , na.rm = TRUE, colour = "blue") + geom_density(data=testbk50000 , na.rm = TRUE, colour = "green") + geom_density(data=testbk100000 , na.rm = TRUE, colour = "orange") + geom_density(data=testbk190000 , na.rm = TRUE, colour = "pink")
dev.copy(png,"../output/env/background_point_check/199902_salinity_surface.png") # to automatically save the plot to a png AND show it inline
dev.off() # stops automatic saving of the plot to a png

ggplot(testbk10000, aes(x = salinity_depth)) + geom_density(na.rm = TRUE, colour = "red") + geom_density(data=testbk20000 , na.rm = TRUE, colour = "blue") + geom_density(data=testbk50000 , na.rm = TRUE, colour = "green") + geom_density(data=testbk100000 , na.rm = TRUE, colour = "orange") + geom_density(data=testbk190000 , na.rm = TRUE, colour = "pink")
dev.copy(png,"../output/env/background_point_check/199902_salinity_depth.png") # to automatically save the plot to a png AND show it inline
dev.off() # stops automatic saving of the plot to a png

ggplot(testbk10000, aes(x = chl_surface)) + geom_density(na.rm = TRUE, colour = "red") + geom_density(data=testbk20000 , na.rm = TRUE, colour = "blue") + geom_density(data=testbk50000 , na.rm = TRUE, colour = "green") + geom_density(data=testbk100000 , na.rm = TRUE, colour = "orange") + geom_density(data=testbk190000 , na.rm = TRUE, colour = "pink")
dev.copy(png,"../output/env/background_point_check/199902_chl_surface.png") # to automatically save the plot to a png AND show it inline
dev.off() # stops automatic saving of the plot to a png

ggplot(testbk10000, aes(x = chl_depth)) + geom_density(na.rm = TRUE, colour = "red") + geom_density(data=testbk20000 , na.rm = TRUE, colour = "blue") + geom_density(data=testbk50000 , na.rm = TRUE, colour = "green") + geom_density(data=testbk1000001999 , na.rm = TRUE, colour = "orange") + geom_density(data=testbk190000 , na.rm = TRUE, colour = "pink")
dev.copy(png,"../output/env/background_point_check/199902_chl_depth.png") # to automatically save the plot to a png AND show it inline
dev.off() # stops automatic saving of the plot to a png

ggplot(testbk10000, aes(x = o2_surface)) + geom_density(na.rm = TRUE, colour = "red") + geom_density(data=testbk20000 , na.rm = TRUE, colour = "blue") + geom_density(data=testbk500001999 , na.rm = TRUE, colour = "green") + geom_density(data=testbk100000 , na.rm = TRUE, colour = "orange") + geom_density(data=testbk190000 , na.rm = TRUE, colour = "pink")
dev.copy(png,"../output/env/background_point_check/199902_o2_surface.png") # to automatically save the plot to a png AND show it inline
dev.off() # stops automatic saving of the plot to a png

ggplot(testbk10000, aes(x = o2_depth)) + geom_density(na.rm = TRUE, colour = "red") + geom_density(data=testbk20000 , na.rm = TRUE, colour = "blue") + geom_density(data=testbk500001999 , na.rm = TRUE, colour = "green") + geom_density(data=testbk100000 , na.rm = TRUE, colour = "orange") + geom_density(data=testbk190000 , na.rm = TRUE, colour = "pink")
dev.copy(png,"../output/env/background_point_check/199902_o2_depth.png") # to automatically save the plot to a png AND show it inline
dev.off() # stops automatic saving of the plot to a png
```


And now 2014_06


```{r}
strt <- Sys.time() #get the start time

xy <- testbglist[ ,c("longitude_","latitude_m")] # This is to tell R where the coordinates are. Note that the column order needs to be longitude, latitude
testbglistsp <- SpatialPointsDataFrame(coords = xy, data = testbglist, proj4string = CRS("+proj=aea +lat_1=50 +lat_2=70 +lat_0=40 +lon_0=-60 +x_0=0 +y_0=0 +ellps=GRS80 +datum=NAD83 +units=m +no_defs")) # The CRS is used here is for the albers equal area projection.


netcdf_list <- list.files("../data/env/bktstncdf", pattern = '*.nc', full.names = TRUE) #true means the full path is included
no_netcdf <- length(netcdf_list) #for the loop - need to know how many files to cycle through
netcdf_name <- list.files("../data/env/bktstncdf", pattern = '*.nc', full.names = FALSE) #false means the path is not included
aea <- raster("../output/env/aea.tif") 
yr <- 2014  # a variable for the observation year
mth <- 06  # a variable for the observation month

for (i in 1:no_netcdf) {  
  print(netcdf_name[i]) #this just prints the name of the netCDF R is working one
  brkyr <- as.integer(sapply(strsplit(netcdf_name[i], "_"), "[[", 1)) # extracting the first part of the netcdf filename (which is the year)
  brkmth <- as.integer(sapply(strsplit(netcdf_name[i], "_"), "[[", 2)) # extracting the second part of the netcdf filename (which is the month)
  brkvar <- (sapply(strsplit(netcdf_name[i], "_"), "[[", 3)) # extracting the third part of the netcdf (inc.nc)
  temp_brick <- brick(netcdf_list[i], lvar = 4)
  temp_brick <- projectRaster(temp_brick, aea) 
    for (j in 1:nrow(testbglistsp)) {  
      de <- testbglistsp$depthlayerno[[j]]  # a variable for the observation depth layer
          if (brkyr == yr & brkmth == mth & brkvar == "temp.nc"){
              testbglistsp$temp_surface[j] <- extract(x=temp_brick[[1]], y = testbglistsp[j, ]) 
              if (is.na(de)){
                testbglistsp$temp_depth[j] <- NA
              } else  
                testbglistsp$temp_depth[j] <- extract(x=temp_brick[[de]], y = testbglistsp[j, ])
          } else if (brkyr == yr & brkmth == mth & brkvar == "salinity.nc") {
              testbglistsp$salinity_surface[j] <- extract(x=temp_brick[[1]], y = testbglistsp[j, ]) 
              if (is.na(de)){
                testbglistsp$salinity_depth[j] <- NA
              } else  
                testbglistsp$salinity_depth[j] <- extract(x=temp_brick[[de]], y = testbglistsp[j, ]) 
          } else if (brkyr == yr & brkmth == mth & brkvar == "chl.nc") {
              testbglistsp$chl_surface[j] <- extract(x=temp_brick[[1]], y = testbglistsp[j, ]) 
              if (is.na(de)){
                testbglistsp$chl_depth[j] <- NA
              } else  
                testbglistsp$chl_depth[j] <- extract(x=temp_brick[[de]], y = testbglistsp[j, ]) 
          } else if (brkyr == yr & brkmth == mth & brkvar == "o2.nc") {
              testbglistsp$o2_surface[j] <- extract(x=temp_brick[[1]], y = testbglistsp[j, ]) 
              if (is.na(de)){
                testbglistsp$o2_depth[j] <- NA
              } else  
                testbglistsp$o2_depth[j] <- extract(x=temp_brick[[de]], y = testbglistsp[j, ]) 
          } else if (brkyr == yr & brkmth == mth & brkvar == "mlp.nc") {
              testbglistsp$mlp_surface[j] <- extract(x=temp_brick[[1]], y = testbglistsp[j, ])
          } else if (brkyr == yr & brkmth == mth & brkvar == "ssh.nc") {
              testbglistsp$ssh_surface[j] <- extract(x=temp_brick[[1]], y = testbglistsp[j, ]) 
            
          }
     
    }
}
write.csv(testbglistsp, "../data/env/background_point_check/201406_allbackgroundpoints.csv", row.names = FALSE)
test_back_df <- as.data.frame(testbglistsp)
print(Sys.time()-strt) #time it took to run
```

ok now create the first lot of random for 2014_06

```{r}
testbk10000 <- test_back_df[sample(nrow(test_back_df), 10000), ]  #where 10000 = number of rows to sample (large sample as per maxent)
testbk20000 <- test_back_df[sample(nrow(test_back_df), 20000), ]  
testbk50000 <- test_back_df[sample(nrow(test_back_df), 50000), ]  
testbk100000 <- test_back_df[sample(nrow(test_back_df), 100000), ]  
testbk190000 <- test_back_df[sample(nrow(test_back_df), 190000), ]  
```

and plot


```{r}
ggplot(testbk10000, aes(x = ssh_surface)) + geom_density(na.rm = TRUE, colour = "red") + geom_density(data=testbk20000 , na.rm = TRUE, colour = "blue") + geom_density(data=testbk50000 , na.rm = TRUE, colour = "green") + geom_density(data=testbk100000 , na.rm = TRUE, colour = "orange") + geom_density(data=testbk190000 , na.rm = TRUE, colour = "pink") + labs(x = "Sea Surface Height Above Geoid (meters)")
dev.copy(png,"../output/env/background_point_check/201406_ssh.png") # to automatically save the plot to a png AND show it inline
dev.off() # stops automatic saving of the plot to a png

ggplot(testbk10000, aes(x = mlp_surface)) + geom_density(na.rm = TRUE, colour = "red") + geom_density(data=testbk20000 , na.rm = TRUE, colour = "blue") + geom_density(data=testbk50000 , na.rm = TRUE, colour = "green") + geom_density(data=testbk100000 , na.rm = TRUE, colour = "orange") + geom_density(data=testbk190000 , na.rm = TRUE, colour = "pink")  + labs(x = "Mixed layer thickness (MLP) (meters)")
dev.copy(png,"../output/env/background_point_check/201406_mlp.png") # to automatically save the plot to a png AND show it inline
dev.off() # stops automatic saving of the plot to a png

ggplot(testbk10000, aes(x = temp_surface)) + geom_density(na.rm = TRUE, colour = "red") + geom_density(data=testbk20000 , na.rm = TRUE, colour = "blue") + geom_density(data=testbk50000 , na.rm = TRUE, colour = "green") + geom_density(data=testbk100000 , na.rm = TRUE, colour = "orange") + geom_density(data=testbk190000 , na.rm = TRUE, colour = "pink")+ labs(x = "Temperature at surface (kelvin)")
dev.copy(png,"../output/env/background_point_check/201406_temp_surface.png") # to automatically save the plot to a png AND show it inline
dev.off() # stops automatic saving of the plot to a png

ggplot(testbk10000, aes(x = temp_depth)) + geom_density(na.rm = TRUE, colour = "red") + geom_density(data=testbk20000 , na.rm = TRUE, colour = "blue") + geom_density(data=testbk50000 , na.rm = TRUE, colour = "green") + geom_density(data=testbk100000 , na.rm = TRUE, colour = "orange") + geom_density(data=testbk190000 , na.rm = TRUE, colour = "pink")  + labs(x = "Temperature at sampling depth (kelvin)")
dev.copy(png,"../output/env/background_point_check/201406_temp_depth.png") # to automatically save the plot to a png AND show it inline
dev.off() # stops automatic saving of the plot to a png

ggplot(testbk10000, aes(x = salinity_surface)) + geom_density(na.rm = TRUE, colour = "red") + geom_density(data=testbk20000 , na.rm = TRUE, colour = "blue") + geom_density(data=testbk50000 , na.rm = TRUE, colour = "green") + geom_density(data=testbk100000 , na.rm = TRUE, colour = "orange") + geom_density(data=testbk190000 , na.rm = TRUE, colour = "pink")
dev.copy(png,"../output/env/background_point_check/201406_salinity_surface.png") # to automatically save the plot to a png AND show it inline
dev.off() # stops automatic saving of the plot to a png

ggplot(testbk10000, aes(x = salinity_depth)) + geom_density(na.rm = TRUE, colour = "red") + geom_density(data=testbk20000 , na.rm = TRUE, colour = "blue") + geom_density(data=testbk50000 , na.rm = TRUE, colour = "green") + geom_density(data=testbk100000 , na.rm = TRUE, colour = "orange") + geom_density(data=testbk190000 , na.rm = TRUE, colour = "pink")
dev.copy(png,"../output/env/background_point_check/201406_salinity_depth.png") # to automatically save the plot to a png AND show it inline
dev.off() # stops automatic saving of the plot to a png

ggplot(testbk10000, aes(x = chl_surface)) + geom_density(na.rm = TRUE, colour = "red") + geom_density(data=testbk20000 , na.rm = TRUE, colour = "blue") + geom_density(data=testbk50000 , na.rm = TRUE, colour = "green") + geom_density(data=testbk100000 , na.rm = TRUE, colour = "orange") + geom_density(data=testbk190000 , na.rm = TRUE, colour = "pink")
dev.copy(png,"../output/env/background_point_check/201406_chl_surface.png") # to automatically save the plot to a png AND show it inline
dev.off() # stops automatic saving of the plot to a png

ggplot(testbk10000, aes(x = chl_depth)) + geom_density(na.rm = TRUE, colour = "red") + geom_density(data=testbk20000 , na.rm = TRUE, colour = "blue") + geom_density(data=testbk50000 , na.rm = TRUE, colour = "green") + geom_density(data=testbk100000 , na.rm = TRUE, colour = "orange") + geom_density(data=testbk190000 , na.rm = TRUE, colour = "pink")
dev.copy(png,"../output/env/background_point_check/201406_chl_depth.png") # to automatically save the plot to a png AND show it inline
dev.off() # stops automatic saving of the plot to a png

ggplot(testbk10000, aes(x = o2_surface)) + geom_density(na.rm = TRUE, colour = "red") + geom_density(data=testbk20000 , na.rm = TRUE, colour = "blue") + geom_density(data=testbk50000 , na.rm = TRUE, colour = "green") + geom_density(data=testbk100000 , na.rm = TRUE, colour = "orange") + geom_density(data=testbk190000 , na.rm = TRUE, colour = "pink")
dev.copy(png,"../output/env/background_point_check/201406_o2_surface.png") # to automatically save the plot to a png AND show it inline
dev.off() # stops automatic saving of the plot to a png

ggplot(testbk10000, aes(x = o2_depth)) + geom_density(na.rm = TRUE, colour = "red") + geom_density(data=testbk20000 , na.rm = TRUE, colour = "blue") + geom_density(data=testbk50000 , na.rm = TRUE, colour = "green") + geom_density(data=testbk100000 , na.rm = TRUE, colour = "orange") + geom_density(data=testbk190000 , na.rm = TRUE, colour = "pink")
dev.copy(png,"../output/env/background_point_check/201406_o2_depth.png") # to automatically save the plot to a png AND show it inline
dev.off() # stops automatic saving of the plot to a png
```



# 3d plot background points

```{r}
bck2014_06_3d <- plot_ly(x= testbk10000$longitude_, y = testbk10000$latitude_m, z = testbk10000$depthlayerno)
bck2014_06_3d
```

# 2d plot background points

```{r}
bck10000points_2d <- plot(x= testbk10000$longitude_, y = testbk10000$latitude_m, xlab = "Longitude (meters)", ylab = "Latitude (meters")
dev.copy(png,"../output/env/background_point_check/bck10000points_2d.png") # to automatically save the plot to a png AND show it inline
dev.off() # stops automatic saving of the plot to a png
```